import numpy as np
import pandas as pd
import os.path
import os
import time
import tensorflow as tf
from midas import Midas

from sklearn.preprocessing import MinMaxScaler

path = ''
  
data_0 = pd.read_csv(path+"data/cces_format.csv")

data_0.columns.str.strip()

categorical = ["gender", "sexuality", "educ", "race", "employ","religpew","inputstate","CC18_317", "pid3"]

data_1 = data_0[categorical]
data_0.drop(categorical, axis= 1, inplace= True)

constructor_list = [data_0]
columns_list = []

for column in data_1.columns:
  na_temp = data_1[column].isnull()
  temp = pd.get_dummies(data_1[column], prefix = column)
  temp[na_temp] = np.nan
  constructor_list.append(temp)
  columns_list.append(list(temp.columns.values))

data_0 = pd.concat(constructor_list, axis=1)

na_loc = data_0.isnull()
data_0[na_loc] = np.nan

scaler = MinMaxScaler()

data_0_scaled = scaler.fit_transform(data_0)
data_0_scaled = pd.DataFrame(data_0_scaled, columns = data_0.columns)


imputer = Midas(layer_structure= [256,256], vae_layer= False, seed= 89)
imputer.build_model(data_0_scaled, softmax_columns= columns_list)

imputer.train_model(training_epochs = 200)

print("Saving datasets...")

imputations = imputer.generate_samples(m=15).output_list

n = 1

for i in imputations:
  out_file = scaler.inverse_transform(i)
  out_file = pd.DataFrame(out_file, columns = data_0.columns)
  file_name = path + "application/data_tmp/cces_mid_" + str(n) + ".csv"
  out_file.to_csv(file_name, index=False)
  n +=1

print("Datasets saved!")
